# Vito D'Orazio
# September 15, 2020
# zerogrids.R

# this script reads in pgm dv data
# it identifies the grids with 0 conflict
# it counts the number of conflict months for all grids 
# writes out a grid-level dataset with a counter for months of observed conflict and the max conflict span for each grid

rm(list=ls())

library(parsedate) # for date formatting
library(data.table) # for lags and leads

setwd("/Users/vjdorazio/PycharmProjects/views")

mydata <- read.csv("data072820/pgm_africa_dvsub.csv")

monthid <- read.csv("data072820/monthids.csv")
monthid <- monthid[,c("month_id", "partition")]
mydata <- merge(mydata, monthid, by="month_id", all.x=TRUE)

# drop all observations not in the train and validation sets (1990-2015)
mydata <- mydata[which(mydata$partition=="train" | mydata$partition=="validate"),]

mydata <- mydata[order(mydata$pg_id, mydata$month_id),]

length(unique(mydata$pg_id)) # 10677
length(which(mydata$ged_best_sb > 0))  # 6990

mydata$flag <- 0
mydata$flag[which(mydata$ged_best_sb > 0)] <- 1

runs <- rle(mydata$flag)
runs2 <- runs$lengths[which(runs$values==1)]

length(which(runs2 > 1)) # 978 conflict spans > 1
max(runs2) # 109 is the max conflict span
length(which(runs2==1)) # 3949 instances of 1 month conflict span

aggdata <- aggregate(flag ~ pg_id, data=mydata, sum) # 10,677 obs
table(aggdata$flag) # 9406 out of 10,677 grids that never experience ANY conflict

colnames(aggdata) <- c("pg_id", "conflict_count")
aggdata <- aggdata[order(aggdata$conflict_count),]


aggdata$max_span <- 0
u <- aggdata$pg_id[which(aggdata$conflict_count>0)]

for(i in 1:length(u)) {
  print(i)
  temp <- mydata[which(mydata$pg_id==u[i]),]
  runs <- rle(temp$flag)
  runs <- runs$lengths[which(runs$values==1)]
  aggdata$max_span[which(aggdata$pg_id==u[i])] <- max(runs)
}

table(aggdata$max_span)

write.csv(aggdata, "data072820/grid_conflict_count.csv", row.names=FALSE)








